In [1]:
%matplotlib inline
import pandas as pd
import seaborn as sbn
sbn.set()

In [2]:
weather = pd.read_csv('weather.csv', index_col=0, na_values=[-99,-9900])
weather.date = pd.to_datetime(weather.date)
weather.pre = weather.pre / 100
weather.index = weather.date
del weather['date']

Snow per total precipitation per year


In [66]:
totalsnow = weather.groupby(weather.index.year).sum().snow
(totalsnow / (weather.groupby(weather.index.year).sum().pre + totalsnow)).plot(kind='bar')


Out[66]:
<matplotlib.axes._subplots.AxesSubplot at 0x1374876d8>

Snow in Spring or Fall / Rain in January


In [61]:
weather[(weather.snow > 0) & (weather.index.month < 11) & (weather.index.month > 3)]


Out[61]:
high low pre snow month weekOfMonth week
date
1951-04-01 36 28 0.10 1 0 0 40
1952-04-10 35 20 0.45 1 1 1 41
1952-04-13 36 30 0.50 5 1 1 41
1952-04-14 48 33 0.51 3 2 2 42
1956-04-29 38 29 0.92 2 4 4 44
1960-04-18 56 25 0.22 1 2 2 42
1961-04-17 42 25 0.13 2 2 2 42
1962-04-01 38 27 0.33 2 0 0 40
1962-04-02 40 12 0.10 1 0 0 40
1962-04-09 42 26 0.08 1 1 1 41
1962-04-12 47 32 0.31 1 1 1 41
1962-04-13 40 22 0.17 2 1 1 41
1971-04-03 35 16 0.03 1 0 0 40
1972-04-04 38 17 0.06 1 0 0 40
1972-04-08 35 7 0.14 2 1 1 41
1973-04-09 29 24 1.05 11 1 1 41
1973-04-10 34 20 1.99 3 1 1 41
1975-04-02 27 18 0.00 1 0 0 40
1975-04-09 38 27 0.12 3 1 1 41
1975-04-10 38 27 0.09 1 1 1 41
1979-04-02 38 28 0.44 5 0 0 40
1979-04-05 35 13 0.07 1 0 0 40
1980-04-03 41 32 0.16 1 0 0 40
1980-04-04 47 28 0.19 1 0 0 40
1980-04-09 33 28 0.56 3 1 1 41
1993-04-01 35 25 0.08 1 0 0 40
1993-04-15 37 32 0.35 1 2 2 42
1993-04-16 50 30 0.59 3 2 2 42
1993-04-20 49 31 1.12 3 2 2 42
1995-04-10 37 28 0.32 1 1 1 41
1996-04-14 41 28 0.10 1 2 2 42
1996-04-15 49 31 0.24 1 2 2 42
1996-04-28 61 37 0.00 1 4 4 44
1997-04-12 38 28 0.17 3 1 1 41
2002-04-01 31 20 0.04 1 0 0 40
2003-04-05 33 18 0.12 1 0 0 40
2003-04-07 40 26 0.36 6 1 1 41
2007-04-11 41 32 0.62 2 1 1 41
2007-04-12 42 28 0.05 1 1 1 41
2008-04-01 45 31 0.09 1 0 0 40
2011-04-16 47 31 0.64 2 2 2 42
2011-04-19 49 32 1.08 4 2 2 42
1972-10-18 33 25 0.03 2 2 2 102
1976-10-19 37 32 0.06 1 2 2 102

In [62]:
weather[(weather.pre > 0) & (weather.index.month == 1)]


Out[62]:
high low pre snow month weekOfMonth week
date
1894-01-05 22 16 0.22 3 0 0 10
1894-01-19 36 18 0.42 1 2 2 12
1894-01-20 41 20 0.14 0 2 2 12
1894-01-23 -2 -15 0.14 2 3 3 13
1894-01-28 23 0 0.22 3 4 4 14
1895-01-03 21 5 0.10 1 0 0 10
1895-01-14 18 -7 0.20 2 2 2 12
1895-01-18 29 9 0.15 2 2 2 12
1895-01-20 37 22 0.40 1 2 2 12
1895-01-24 14 4 0.40 5 3 3 13
1895-01-30 3 -16 0.20 2 4 4 14
1896-01-05 6 -16 0.05 1 0 0 10
1896-01-24 33 20 0.60 2 3 3 13
1897-01-03 42 21 0.96 2 0 0 10
1897-01-12 21 0 0.39 4 1 1 11
1897-01-16 33 28 0.31 1 2 2 12
1897-01-17 30 26 0.12 2 2 2 12
1897-01-23 5 -7 0.45 5 3 3 13
1898-01-12 33 26 0.70 2 1 1 11
1898-01-20 37 30 0.12 1 2 2 12
1899-01-05 8 -5 0.27 3 0 0 10
1899-01-22 34 19 0.08 1 3 3 13
1899-01-25 27 15 0.03 1 3 3 13
1900-01-01 20 2 0.04 1 0 0 10
1900-01-09 39 26 0.13 0 1 1 11
1900-01-13 32 25 0.10 1 1 1 11
1900-01-15 34 27 0.45 1 2 2 12
1900-01-17 33 28 0.34 1 2 2 12
1901-01-09 21 8 0.50 6 1 1 11
1901-01-26 40 21 0.20 1 3 3 13
... ... ... ... ... ... ... ...
2010-01-21 32 27 0.01 0 3 3 13
2010-01-22 32 28 0.01 0 3 3 13
2010-01-23 35 30 0.15 0 3 3 13
2010-01-24 36 30 0.48 0 3 3 13
2010-01-26 18 6 0.01 1 3 3 13
2011-01-05 24 -5 0.01 0 0 0 10
2011-01-11 25 20 0.13 2 1 1 11
2011-01-13 17 -7 0.01 0 1 1 11
2011-01-15 20 12 0.08 0 2 2 12
2011-01-17 29 13 0.25 3 2 2 12
2011-01-18 29 15 0.01 0 2 2 12
2011-01-19 16 3 0.01 0 2 2 12
2011-01-22 12 -1 0.04 1 3 3 13
2011-01-26 23 14 0.01 0 3 3 13
2011-01-27 30 17 0.02 1 3 3 13
2011-01-29 31 27 0.01 0 4 4 14
2011-01-30 28 21 0.01 0 4 4 14
2011-01-31 25 17 0.27 4 4 4 14
2012-01-12 37 11 0.22 4 1 1 11
2012-01-13 16 11 0.04 0 1 1 11
2012-01-17 35 15 0.03 1 2 2 12
2012-01-20 6 -8 0.38 6 2 2 12
2012-01-22 30 13 0.01 0 3 3 13
2012-01-23 32 25 0.12 1 3 3 13
2012-01-29 30 17 0.03 1 4 4 14
2013-01-11 42 33 0.19 0 1 1 11
2013-01-23 20 4 0.03 2 3 3 13
2013-01-27 31 23 0.31 0 3 3 13
2013-01-30 35 19 0.28 4 4 4 14
2013-01-31 19 2 0.01 0 4 4 14

915 rows × 7 columns

Highs and Lows


In [58]:
weather.groupby(weather.index.year).mean().high.plot()+weather.groupby(weather.index.year).mean().low.plot()


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-58-40a23dd4fdd4> in <module>()
----> 1 weather.groupby(weather.index.year).mean().high.plot()+weather.groupby(weather.index.year).mean().low.plot()

TypeError: unsupported operand type(s) for +: 'AxesSubplot' and 'AxesSubplot'

In [3]:
sbn.violinplot(x=weather.index.year//10*10,y=(weather.high+weather.low)/2)


Out[3]:
<matplotlib.axes._subplots.AxesSubplot at 0x1096e8cc0>

White Christmae


In [4]:
weather.sort_index(inplace=True)
snowonground = []
inches = 0
for idx,row in weather.iterrows():
    inches += row['snow']
    if row.high > 32:
        inches -= (row['high']-32)/4
    if inches < 0:
        inches = 0
    weather.set_value(idx,'totalsnow',inches)
    snowonground.append(inches)
    
weather.totalsnow = snowonground

In [5]:
%time
def accsnow(row):
    accsnow.inches += row.snow
    if row.high > 32 and accsnow.inches >= 0:
        accsnow.inches -= (row.high-32)*.25
    if accsnow.inches < 0:
        accsnow.inches = 0
    return accsnow.inches

accsnow.inches = 0

weather['totalsnow'] = weather.apply(accsnow,axis=1)


CPU times: user 3 µs, sys: 0 ns, total: 3 µs
Wall time: 6.2 µs

In [8]:
weather[(weather.index.month == 12) & (weather.index.day == 25)].totalsnow.plot(kind='bar')


Out[8]:
<matplotlib.axes._subplots.AxesSubplot at 0x127fff668>

How much snow per year?


In [33]:
d = {'totalsnow':[]}
df = pd.DataFrame(data=d)
for yr in range(1893,2013):
    df.loc[yr] = weather[weather.index.year==yr].snow.values.sum()

In [35]:
df.plot(kind='bar')


Out[35]:
<matplotlib.axes._subplots.AxesSubplot at 0x16d82ac88>

In [ ]:

Which Saturdays in the summer have the least amount of rain?


In [53]:
weather[(weather.index.dayofweek == 5) & (weather.index.month > 4) & (weather.index.month < 10)].groupby('week').mean().pre.plot('bar')


Out[53]:
<matplotlib.axes._subplots.AxesSubplot at 0x137833588>

Large Storms (consecutive days of rain)


In [68]:
def flood(row):
    if row.pre > 0:
        flood.days += 1
    else:
        flood.days = 0
    return flood.days

flood.days = 0
weather['daysOfRain'] = weather.apply(flood,axis=1)

In [75]:
weather.sort('daysOfRain',ascending=False).daysOfRain.head()


Out[75]:
date
1925-06-04    12
2001-05-07    12
1925-06-03    11
2001-05-06    11
1925-06-02    10
Name: daysOfRain, dtype: int64

Drought Periods


In [79]:
def drought(row):
    if row.pre == 0.0:
        drought.days += 1
    else:
        drought.days = 0
    return drought.days

drought.days = 0
weather['daysOfDrought'] = weather.apply(drought,axis=1)

In [83]:
weather.sort('daysOfDrought',ascending=False).daysOfDrought.head()


Out[83]:
date
1911-03-26    54
1911-03-25    53
1911-03-24    52
1911-03-23    51
1911-03-22    50
Name: daysOfDrought, dtype: int64

Day of maximum difference between high and low


In [17]:
w = weather
w['tempdiff'] = (w.high - w.low)

In [19]:
w[w.tempdiff == w.tempdiff.max()]


Out[19]:
high low pre snow tempdiff
date
1948-02-16 46 -32 0 0 78

Most Rainfall (in inches)


In [16]:
weather[weather.pre == weather.pre.max()]


Out[16]:
high low pre snow
date
1941-05-30 68 58 7.7 0

Most Snow (in inches)


In [6]:
weather[weather.snow == weather.snow.max()]


Out[6]:
high low pre snow
date
1971-01-04 24 6 1.15 15

Hottest/coldest day in record


In [4]:
weather[weather.high == weather.high.max()]


Out[4]:
high low pre snow
date
1936-07-14 111 71 0 0

In [15]:
weather[weather.low == weather.low.min()]


Out[15]:
high low pre snow
date
1951-01-30 2 -43 0 0